NOTE: from level 4/5 onwards I will execute most steps locally, as the datasets are not as large anymore.

1 Introduction

l Here, we will work on the level 5 of the CD4 T cell, which entails:

  • Dividing CD9+ cluster into 2.
  • Include a subset of memory B cells to assess whether we can find mem B-derived PC.
  • Subset LZ so that it does not introduce that much variability.

1.1 Load packages

library(Seurat)
library(SeuratWrappers)
library(harmony)
library(tidyverse)

1.2 Parameters

# Paths
path_to_obj <- "~/Desktop/CNAG/mnt_clust/tonsil_atlas/current/scRNA-seq/3-clustering/4-level_4/PC/PC_GC.rds"
path_to_mbc <- "~/Desktop/CNAG/mnt_clust/tonsil_atlas/current/scRNA-seq/results/R_objects/level_3/NBC_MBC/NBC_MBC_clustered_level_3_with_pre_freeze.rds"
path_to_save <- "~/Desktop/CNAG/mnt_clust/tonsil_atlas/current/scRNA-seq/results/R_objects/level_5/PC/PC_subseted_integrated_level_5.rds"


# Colors
color_palette <-  c("#1CFFCE", "#90AD1C", "#C075A6", "#85660D", "#5A5156", "#AA0DFE",   
                    "#F8A19F", "#F7E1A0", "#1C8356", "#FEAF16", "#822E1C", "#C4451C",   
                    "#1CBE4F", "#325A9B", "#F6222E", "#FE00FA", "#FBE426", "#16FF32", 
                    "black",   "#3283FE", "#B00068", "#DEA0FD", "#B10DA1", "#E4E1E3",   
                    "#90AD1C", "#FE00FA", "#85660D", "#3B00FB", "#822E1C", "coral2", 
                    "#1CFFCE", "#1CBE4F", "#3283FE", "#FBE426", "#F7E1A0", "#325A9B",   
                    "#2ED9FF", "#B5EFB5", "#5A5156", "#DEA0FD", "#FEAF16", "#683B79",   
                    "#B10DA1", "#1C7F93", "#F8A19F", "dark orange", "#FEAF16", "#FBE426",  
                    "Brown")

# Functions
source("~/Desktop/CNAG/mnt_clust/tonsil_atlas/current/scRNA-seq/bin/utils.R")


downsample_cells <- function(seurat, var, n_cells) {
  seurat$barcode <- colnames(seurat)
  proportions <- table(seurat[[var]]) / ncol(seurat)
  cells_list <- purrr::map2(names(proportions), proportions, function(x, y) {
    n <- n_cells * y
    df <- seurat@meta.data[seurat@meta.data[[var]] == x, ]
    cells <- df %>%
      dplyr::sample_n(n, replace = FALSE) %>%
      dplyr::pull(barcode)
    cells
  })
  selected_cells <- unlist(cells_list)
  seurat_sub <- subset(seurat, cells = selected_cells)
  seurat_sub
}

1.3 Load data

# Load and subset class-switch memory B cells
mem_b <- readRDS(path_to_mbc)
DimPlot(mem_b)

FeaturePlot(mem_b, c("CD27", "IGHA1"))

mem_b <- subset(mem_b, idents = "1")
set.seed(123)
mem_b <- downsample_cells(mem_b, var = "gem_id", n_cells = 500)


# Seurat object
seurat <- readRDS(path_to_obj)
seurat
## An object of class Seurat 
## 37378 features across 15473 samples within 1 assay 
## Active assay: RNA (37378 features, 2000 variable features)
##  3 dimensional reductions calculated: pca, harmony, umap
DimPlot(seurat, cols = color_palette, pt.size = 0.2)

2 Divide CD9 cluster

seurat <- FindSubCluster(
  seurat,
  cluster = "CD9",
  graph.name = "RNA_snn",
  subcluster.name = "cd9_subclusters",
  resolution = 0.15
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1422
## Number of edges: 46517
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8708
## Number of communities: 5
## Elapsed time: 0 seconds
DimPlot(seurat, group.by = "cd9_subclusters", cols = color_palette)

p <- seurat@reductions$umap@cell.embeddings %>%
  as.data.frame() %>%
  mutate(cluster = seurat$cd9_subclusters) %>%
  filter(str_detect(cluster, "CD9")) %>%
  ggplot(aes(UMAP_1, UMAP_2, color = cluster)) +
    geom_point(size = 0.1) +
    theme_classic()
p

seurat <- subset(seurat, cd9_subclusters != "CD9_2")

# DEA
Idents(seurat) <- "cd9_subclusters"
dea <- FindMarkers(
  seurat,
  ident.1 = "CD9_1",
  ident.2 = "CD9_0",
  logfc.threshold = 0.25,
  only.pos = FALSE
)
dea <- dea %>%
  rownames_to_column("gene") %>%
  arrange(desc(avg_log2FC))
DT::datatable(dea)
# Visualize markers
markers_interest <- c("XBP1", "JCHAIN", "IGHM", "MIR155HG")
feat_plots <- purrr::map(markers_interest, function(x) {
  p <- FeaturePlot(seurat, x) +
    scale_color_viridis_c(option = "magma")
  p
})
feat_plots
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

3 Subset LZ GC B cells

seurat <- FindNeighbors(seurat, reduction = "harmony", dims = 1:30)
seurat <- FindSubCluster(
  seurat,
  cluster = "BCL2A1",
  graph.name = "RNA_snn",
  subcluster.name = "lz_subclusters",
  resolution = 0.4
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 7466
## Number of edges: 258681
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8067
## Number of communities: 8
## Elapsed time: 1 seconds
DimPlot(seurat, group.by = "lz_subclusters", cols = color_palette)

p <- seurat@reductions$umap@cell.embeddings %>%
  as.data.frame() %>%
  mutate(cluster = seurat$lz_subclusters) %>%
  filter(str_detect(cluster, "BCL2A1_2")) %>%
  ggplot(aes(UMAP_1, UMAP_2, color = cluster)) +
    geom_point(size = 0.1) +
    theme_classic()
p

As we can see, we will subset to cluster “BCL2A1_2”:

excluded_clusters <- c("BCL2A1_0", "BCL2A1_1", "BCL2A1_3", "BCL2A1_4",
                       "BCL2A1_5", "BCL2A1_6")
selected_cells <- colnames(seurat)[!(seurat$lz_subclusters %in% excluded_clusters)]
seurat <- subset(seurat, cells = selected_cells)
DimPlot(seurat, group.by = "lz_subclusters", cols = color_palette)

4 Include class-switched memory B cells and reprocess

seurat <- subset(
  seurat,
  cells = colnames(seurat)[Idents(seurat) != "Histone like"]
)
DimPlot(seurat, group.by = "lz_subclusters", cols = color_palette)

mem_b$annotation_level_5 <- "class-switched MBC"
seurat$annotation_level_5 <- seurat$lz_subclusters
seurat_merged <- merge(x = seurat, y = mem_b)
seurat_list <- SplitObject(seurat_merged, split.by = "assay")
seurat_list <- seurat_list[c("3P", "multiome")]
seurat_list <- purrr::map(
  seurat_list,
  FindVariableFeatures,
  nfeatures = 5000
)
hvg <- purrr::map(seurat_list, VariableFeatures)
shared_hvg <- intersect(hvg$`3P`, hvg$multiome)
# ElbowPlot(seurat_merged, reduction = "harmony")
seurat_merged <- seurat_merged %>%
  ScaleData(features = shared_hvg) %>%
  RunPCA(features = shared_hvg) %>%
  RunHarmony(group.by.vars = "assay", reduction ="pca", dims = 1:12) %>%
  RunUMAP(reduction = "harmony", dims = 1:12)
Idents(seurat_merged) <- "annotation_level_5"
DimPlot(seurat_merged, cols = color_palette)

DimPlot(seurat_merged, cols = color_palette, group.by = "assay")

As we can see, there is a subcluster in “GC Derived precursor 3” that is multiome-specific. Thus, we will exclude it:

seurat_merged <- FindNeighbors(seurat_merged, reduction = "harmony", dims = 1:12)
seurat_merged <- FindSubCluster(
  seurat_merged,
  cluster = "GC Derived precursor 3",
  graph.name = "RNA_snn",
  subcluster.name = "multiome_specific",
  resolution = 0.1
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 889
## Number of edges: 23089
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9159
## Number of communities: 2
## Elapsed time: 0 seconds
DimPlot(seurat_merged, group.by = "multiome_specific", cols = color_palette)

seurat_merged <- subset(
  seurat_merged,
  multiome_specific != "GC Derived precursor 3_1"
)
DimPlot(seurat_merged, group.by = "multiome_specific", cols = color_palette)

# Reprocess
seurat_list <- SplitObject(seurat_merged, split.by = "assay")
seurat_list <- seurat_list[c("3P", "multiome")]
seurat_list <- purrr::map(
  seurat_list,
  FindVariableFeatures,
  nfeatures = 5000
)
hvg <- purrr::map(seurat_list, VariableFeatures)
shared_hvg <- intersect(hvg$`3P`, hvg$multiome)
# ElbowPlot(seurat_merged, reduction = "harmony")
seurat_merged <- seurat_merged %>%
  ScaleData(features = shared_hvg) %>%
  RunPCA(features = shared_hvg) %>%
  RunHarmony(group.by.vars = "assay", reduction ="pca", dims = 1:20) %>%
  RunUMAP(reduction = "harmony", dims = 1:20)
DimPlot(seurat_merged, cols = color_palette)

5 Create input shiny app

input_shiny <- seurat2shiny(seurat_merged, slot = "data", reduction = "umap")

6 Save

saveRDS(seurat_merged, path_to_save)
saveRDS(
  input_shiny$expression,
  "~/Desktop/CNAG/mnt_clust/tonsil_atlas/current/scRNA-seq/results/R_objects/level_5/PC/PC_expression_to_shiny_app_level_5.rds")
saveRDS(
  input_shiny$metadata,
  "~/Desktop/CNAG/mnt_clust/tonsil_atlas/current/scRNA-seq/results/R_objects/level_5/PC/PC_metadata_to_shiny_app_level_5.rds")